941610
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.hive.ql.lib;
 
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
 import java.util.Stack;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -31,7 +34,54 @@
 public class RuleRegExp implements Rule {
 
   private final String ruleName;
-  private final Pattern pattern;
+  private final Pattern patternWithWildCardChar;
+  private final String patternWithoutWildCardChar;
+  private String[] patternORWildChar;
+  private static final Set<Character> wildCards = new HashSet<Character>(Arrays.asList(
+    '[', '^', '$', '*', ']', '+', '|', '(', '\\', '.', '?', ')', '&'));
+
+  /**
+   * The function iterates through the list of wild card characters and sees if
+   * this regular expression contains a wild card character.
+   *
+   * @param pattern
+   *          pattern expressed as a regular Expression
+   */
+  private static boolean patternHasWildCardChar(String pattern) {
+    if (pattern == null) {
+      return false;
+    }
+    for (char pc : pattern.toCharArray()) {
+      if (wildCards.contains(pc)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * The function iterates through the list of wild card characters and sees if
+   * this regular expression contains  only the given char as wild card character.
+   *
+   * @param pattern
+   *          pattern expressed as a regular Expression
+   * @param wcc
+   *          wild card character
+   */
+  private static boolean patternHasOnlyWildCardChar(String pattern, char wcc) {
+    if (pattern == null) {
+      return false;
+    }
+    boolean ret = true;
+    boolean hasWildCard = false;
+    for (char pc : pattern.toCharArray()) {
+      if (wildCards.contains(pc)) {
+        hasWildCard = true;
+        ret = ret && (pc == wcc);
+      }
+    }
+    return ret && hasWildCard;
+  }
 
   /**
    * The rule specified by the regular expression. Note that, the regular
@@ -46,32 +96,155 @@
    **/
   public RuleRegExp(String ruleName, String regExp) {
     this.ruleName = ruleName;
-    pattern = Pattern.compile(regExp);
+
+    if (patternHasWildCardChar(regExp)) {
+      if (patternHasOnlyWildCardChar(regExp, '|')) {
+          this.patternWithWildCardChar = null;
+          this.patternWithoutWildCardChar = null;
+          this.patternORWildChar = regExp.split("\\|");
+      } else {
+        this.patternWithWildCardChar = Pattern.compile(regExp);
+        this.patternWithoutWildCardChar = null;
+        this.patternORWildChar = null;
+      }
+    } else {
+      this.patternWithWildCardChar = null;
+      this.patternWithoutWildCardChar = regExp;
+      this.patternORWildChar = null;
+    }
   }
 
   /**
-   * This function returns the cost of the rule for the specified stack. Lower
-   * the cost, the better the rule is matched
-   * 
+   * This function returns the cost of the rule for the specified stack when the pattern
+   * matched for has no wildcard character in it. The function expects patternWithoutWildCardChar
+   * to be not null.
    * @param stack
    *          Node stack encountered so far
    * @return cost of the function
    * @throws SemanticException
    */
-  @Override
-  public int cost(Stack<Node> stack) throws SemanticException {
+  private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException {
     int numElems = (stack != null ? stack.size() : 0);
+    String name = new String("");
+    int patLen = patternWithoutWildCardChar.length();
+
+    for (int pos = numElems - 1; pos >= 0; pos--) {
+        name = stack.get(pos).getName() + "%" + name;
+      if (name.length() >= patLen) {
+        if (patternWithoutWildCardChar.equals(name)) {
+          return patLen;
+        } else {
+          return -1;
+        }
+      }
+    }
+    return -1;
+  }
+
+  /**
+   * This function returns the cost of the rule for the specified stack when the pattern
+   * matched for has only OR wildcard character in it. The function expects patternORWildChar
+   * to be not null.
+   * @param stack
+   *          Node stack encountered so far
+   * @return cost of the function
+   * @throws SemanticException
+   */
+  private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException {
+    int numElems = (stack != null ? stack.size() : 0);
+    for (String pattern : patternORWildChar) {
+      String name = new String("");
+      int patLen = pattern.length();
+
+      for (int pos = numElems - 1; pos >= 0; pos--) {
+        name = stack.get(pos).getName() + "%" + name;
+        if (name.length() >= patLen) {
+          if (pattern.equals(name)) {
+            return patLen;
+          } else {
+            break;
+          }
+        }
+      }
+    }
+    return -1;
+  }
+
+  /**
+   * This function returns the cost of the rule for the specified stack when the pattern
+   * matched for has wildcard character in it. The function expects patternWithWildCardChar
+   * to be not null.
+   *
+   * @param stack
+   *          Node stack encountered so far
+   * @return cost of the function
+   * @throws SemanticException
+   */
+  private int costPatternWithWildCardChar(Stack<Node> stack) throws SemanticException {
+	int numElems = (stack != null ? stack.size() : 0);
     String name = "";
+    Matcher m = patternWithWildCardChar.matcher("");
     for (int pos = numElems - 1; pos >= 0; pos--) {
       name = stack.get(pos).getName() + "%" + name;
-      Matcher m = pattern.matcher(name);
+      m.reset(name);
       if (m.matches()) {
-        return m.group().length();
+        return name.length();
       }
     }
     return -1;
   }
 
+  /**
+   * Returns true if the rule pattern is valid and has wild character in it.
+   */
+  boolean rulePatternIsValidWithWildCardChar() {
+    return patternWithoutWildCardChar == null && patternWithWildCardChar != null && this.patternORWildChar == null;
+  }
+
+  /**
+   * Returns true if the rule pattern is valid and has wild character in it.
+   */
+  boolean rulePatternIsValidWithoutWildCardChar() {
+    return patternWithWildCardChar == null && patternWithoutWildCardChar != null && this.patternORWildChar == null;
+  }
+
+  /**
+   * Returns true if the rule pattern is valid and has wild character in it.
+   */
+  boolean rulePatternIsValidWithORWildCardChar() {
+    return patternWithoutWildCardChar == null && patternWithWildCardChar == null && this.patternORWildChar != null;
+  }
+
+  /**
+   * This function returns the cost of the rule for the specified stack. Lower
+   * the cost, the better the rule is matched
+   *
+   * @param stack
+   *          Node stack encountered so far
+   * @return cost of the function
+   * @throws SemanticException
+   */
+  @Override
+  public int cost(Stack<Node> stack) throws SemanticException {
+    if (rulePatternIsValidWithoutWildCardChar()) {
+      return costPatternWithoutWildCardChar(stack);
+    }
+    if (rulePatternIsValidWithWildCardChar()) {
+      return costPatternWithWildCardChar(stack);
+    }
+    if (rulePatternIsValidWithORWildCardChar()) {
+      return costPatternWithORWildCardChar(stack);
+    }
+    // If we reached here, either :
+    // 1. patternWithWildCardChar and patternWithoutWildCardChar are both nulls.
+    // 2. patternWithWildCardChar and patternWithoutWildCardChar are both not nulls.
+    // This is an internal error and we should not let this happen, so throw an exception.
+    throw new SemanticException (
+      "Rule pattern is invalid for " + getName() + " : patternWithWildCardChar = " +
+      patternWithWildCardChar + " patternWithoutWildCardChar = " +
+      patternWithoutWildCardChar);
+  }
+
   /**
    * @return the name of the Node
    **/
